### Replication code
### Article: "Turnover: How lame-duck governments disrupt the bureaucracy and service delivery before leaving office"
### Author: Guillermo Toral (www.guillermotoral.com)
### Date: July 2023
### This file prepares the dataset on municipal healthcare service provision
### This file uses the DATASUS healthcare service and personnel files available under datasets > healthcare. Download instructions are in the README file.
### R version, platform, and package versions reported at the end of the file

# Prepare the environment -------------------------------------------------

### This section of the code prepares the environment 

# Clean the environment
rm(list = ls())

# Install required packages if not previously installed
package_list <- c("tidyverse", "here") 
packages_to_install <- package_list[!(package_list %in% installed.packages()[,"Package"])]
if(length(packages_to_install)>0){
  install.packages(packages_to_install)
}

# Load required packages
library(tidyverse)
library(here)

# Set Working Directory to wherever this file is located.
setwd(here())

# Clean DATASUS files ------------------------------------------------------

# Healthcare data are from DATASUS, the data portal of Brazil's Ministry of Healthcare.
# See download instructions in the README file

# Remove initial rows with information about variable and years
# Replace "-" with 0, as per the legend at the bottom of TABNET tables; replacing "-" with NA instead returns similar results
# Remove data for years 1998-2003, when SIAB only informs data aggregated at the year level

months <- c("jan", "feb", "mar" ,"apr", "may", "jun", "jul", "ago", "sep", "oct", "nov", "dec")
years <- as.character(c(2004:2015))
month_year <- expand.grid(months, years)
siab_column_names <- c("municipio", "all_1998", "all_1999", "all_2000", "all_2001", "all_2002", "all_2003", paste0(month_year$Var1, "_", month_year$Var2))

visits_acs <- read_delim("../../datasets/downloaded/healthcare/siab/siab_visits_acs.csv",delim=";",skip=4,col_names=siab_column_names)
visits_acs <- visits_acs %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

visits_nurses <- read_delim("../../datasets/downloaded/healthcare/siab/siab_visits_nurses.csv",delim=";",skip=4,col_names=siab_column_names)
visits_nurses <- visits_nurses %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

visits_doctors <- read_delim("../../datasets/downloaded/healthcare/siab/siab_visits_doctors.csv",delim=";",skip=4,col_names=siab_column_names)
visits_doctors <- visits_doctors %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

prenatal <- read_delim("../../datasets/downloaded/healthcare/siab/siab_prenatal.csv",delim=";",skip=4,col_names=siab_column_names)
prenatal <- prenatal %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

consultations_infants <- read_delim("../../datasets/downloaded/healthcare/siab/siab_consultations_infants.csv",delim=";",skip=4,col_names=siab_column_names)
consultations_infants <- consultations_infants %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

consultations_children <- read_delim("../../datasets/downloaded/healthcare/siab/siab_consultations_children.csv",delim=";",skip=4,col_names=siab_column_names)
consultations_children <- consultations_children %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

infants_vaccinated <- read_delim("../../datasets/downloaded/healthcare/siab/siab_vaccinations_0-1.csv",delim=";",skip=4,col_names=siab_column_names)
infants_vaccinated <- infants_vaccinated %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

pregnancies_vaccinated <- read_delim("../../datasets/downloaded/healthcare/siab/siab_pregnancies_vaccinations.csv",delim=";",skip=4,col_names=siab_column_names)
pregnancies_vaccinated <- pregnancies_vaccinated %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

births <- read_delim("../../datasets/downloaded/healthcare/siab/siab_births.csv",delim=";",skip=4,col_names=siab_column_names)
births <- births %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

births_lowweight <- read_delim("../../datasets/downloaded/healthcare/siab/siab_lifebirths_weight_below_2500g.csv",delim=";",skip=4,col_names=siab_column_names)
births_lowweight <- births_lowweight %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

babies_below1yr <- read_delim("../../datasets/downloaded/healthcare/siab/siab_babies_below1yr.csv",delim=";",skip=4,col_names=siab_column_names)
babies_below1yr <- babies_below1yr %>%
  mutate_at(vars(2:151),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio, all_1998, all_1999, all_2000, all_2001, all_2002, all_2003)) %>%
  relocate(cod_ibge)

months <- c("jan", "feb", "mar" ,"apr", "may", "jun", "jul", "ago", "sep", "oct", "nov", "dec")
years_1 <- as.character(c(2008:2015))
month_year_1 <- expand.grid(months, years_1)
month_year_1 <- month_year_1[1:94,]
cnes_column_names_1 <- c("municipio", "ago_2007", "sep_2007", "oct_2007", "nov_2007", "dec_2007", paste0(month_year_1$Var1, "_", month_year_1$Var2))

years_2 <- as.character(c(2016:2021))
month_year_2 <- expand.grid(months, years_2)
month_year_2 <- month_year_2[1:66,]
cnes_column_names_2 <- c("municipio", "nov_2015", "dec_2015", paste0(month_year_2$Var1, "_", month_year_2$Var2))

personnel_all_1 <- read_delim("../../datasets/downloaded/healthcare/cnes/cnes_personnel_all_2007-15.csv",delim=";",skip=5,col_names=cnes_column_names_1)
personnel_all_1 <- personnel_all_1 %>%
  mutate_at(vars(2:100),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio)) %>%
  relocate(cod_ibge)
personnel_all_1 <- personnel_all_1[which(!is.na(personnel_all_1$cod_ibge)),]

personnel_all_2 <- read_delim("../../datasets/downloaded/healthcare/cnes/cnes_personnel_all_2015-21.csv",delim=";",skip=5,col_names=cnes_column_names_2)
personnel_all_2 <- personnel_all_2 %>%
  mutate_at(vars(2:69),list(~as.numeric(str_replace_all(.,'-','0')))) %>%
  mutate(cod_ibge = as.numeric(gsub("([0-9]+).*$", "\\1", municipio))) %>%
  select(-c(municipio)) %>%
  relocate(cod_ibge)
personnel_all_2 <- personnel_all_2[which(!is.na(personnel_all_2$cod_ibge)),]

nrow(personnel_all_1)
nrow(personnel_all_2)

personnel_all <- left_join(personnel_all_1, personnel_all_2)

# Build healthcare dataset ----------------------------------------------

e <- read_csv("../../datasets/downloaded/other/basedosdados_municipality_identifiers.csv") %>%
  mutate(cod_ibge = id_municipio_6) %>%
  select(cod_ibge)

election_years <- c(2004, 2008, 2012, 2016)
postelection_years <- election_years + 1
q13_months <- c("jan", "feb", "mar")
q14_months <- c("apr", "may", "jun")
q15_months <- c("jul", "ago", "sep")
q16_months <- c("oct", "nov", "dec")
q1_months <- c("jan", "feb", "mar")

# Home visits by community health agents (ACS in portuguese)

pp <- visits_acs %>%
  gather(colnames(visits_acs)[2:145], key=Period, value=acs_visits) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), acs_visits = as.numeric(acs_visits))

acs_visits_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "acs_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(acs_visits_q14=sum(acs_visits))
acs_visits_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "acs_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(acs_visits_q15=sum(acs_visits))
acs_visits_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "acs_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(acs_visits_q16=sum(acs_visits))
acs_visits_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "acs_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(acs_visits_q1=sum(acs_visits))
acs_visits_q1$year <- acs_visits_q1$year-1

e <- left_join(e, acs_visits_q14)
e <- left_join(e, acs_visits_q15)
e <- left_join(e, acs_visits_q16)
e <- left_join(e, acs_visits_q1)

# Home visits by nurses

pp <- visits_nurses %>%
  gather(colnames(visits_nurses)[2:145], key=Period, value=nurse_visits) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), nurse_visits = as.numeric(nurse_visits))

nurse_visits_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "nurse_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(nurse_visits_q14=sum(nurse_visits))
nurse_visits_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "nurse_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(nurse_visits_q15=sum(nurse_visits))
nurse_visits_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "nurse_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(nurse_visits_q16=sum(nurse_visits))
nurse_visits_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "nurse_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(nurse_visits_q1=sum(nurse_visits))
nurse_visits_q1$year <- nurse_visits_q1$year-1

e <- left_join(e, nurse_visits_q14)
e <- left_join(e, nurse_visits_q15)
e <- left_join(e, nurse_visits_q16)
e <- left_join(e, nurse_visits_q1)

# Home visits by doctors

pp <- visits_doctors %>%
  gather(colnames(visits_doctors)[2:145], key=Period, value=doctor_visits) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), doctor_visits = as.numeric(doctor_visits))

doctor_visits_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "doctor_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(doctor_visits_q14=sum(doctor_visits))
doctor_visits_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "doctor_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(doctor_visits_q15=sum(doctor_visits))
doctor_visits_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "doctor_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(doctor_visits_q16=sum(doctor_visits))
doctor_visits_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "doctor_visits")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(doctor_visits_q1=sum(doctor_visits))
doctor_visits_q1$year <- doctor_visits_q1$year-1

e <- left_join(e, doctor_visits_q14)
e <- left_join(e, doctor_visits_q15)
e <- left_join(e, doctor_visits_q16)
e <- left_join(e, doctor_visits_q1)

# Prenatal care checkups

pp <- prenatal %>%
  gather(colnames(prenatal)[2:145], key=Period, value=prenatal_checkups) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), prenatal_checkups = as.numeric(prenatal_checkups))

prenatal_checkups_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "prenatal_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(prenatal_checkups_q14=sum(prenatal_checkups))
prenatal_checkups_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "prenatal_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(prenatal_checkups_q15=sum(prenatal_checkups))
prenatal_checkups_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "prenatal_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(prenatal_checkups_q16=sum(prenatal_checkups))
prenatal_checkups_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "prenatal_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(prenatal_checkups_q1=sum(prenatal_checkups))
prenatal_checkups_q1$year <- prenatal_checkups_q1$year-1

e <- left_join(e, prenatal_checkups_q14)
e <- left_join(e, prenatal_checkups_q15)
e <- left_join(e, prenatal_checkups_q16)
e <- left_join(e, prenatal_checkups_q1)

# Medical consultations with infants

pp <- consultations_infants %>%
  gather(colnames(consultations_infants)[2:145], key=Period, value=infant_checkups) %>%
  separate(Period, c("month", "year")) %>%
  dplyr::mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), infant_checkups = as.numeric(infant_checkups))

infant_checkups_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "infant_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infant_checkups_q14=sum(infant_checkups))
infant_checkups_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "infant_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infant_checkups_q15=sum(infant_checkups))
infant_checkups_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "infant_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infant_checkups_q16=sum(infant_checkups))
infant_checkups_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "infant_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infant_checkups_q1=sum(infant_checkups))
infant_checkups_q1$year <- infant_checkups_q1$year-1

e <- left_join(e, infant_checkups_q14)
e <- left_join(e, infant_checkups_q15)
e <- left_join(e, infant_checkups_q16)
e <- left_join(e, infant_checkups_q1)

# Medical consultations with children

pp <- consultations_children %>%
  gather(colnames(consultations_children)[2:145], key=Period, value=child_checkups) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), child_checkups = as.numeric(child_checkups))

child_checkups_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "child_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(child_checkups_q14=sum(child_checkups))
child_checkups_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "child_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(child_checkups_q15=sum(child_checkups))
child_checkups_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "child_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(child_checkups_q16=sum(child_checkups))
child_checkups_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "child_checkups")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(child_checkups_q1=sum(child_checkups))
child_checkups_q1$year <- child_checkups_q1$year-1

e <- left_join(e, child_checkups_q14)
e <- left_join(e, child_checkups_q15)
e <- left_join(e, child_checkups_q16)
e <- left_join(e, child_checkups_q1)

# Pregnant women with their vaccines up to date

pp <- pregnancies_vaccinated %>%
  gather(colnames(pregnancies_vaccinated)[2:145], key=Period, value=pregnancies_vaccinated) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), pregnancies_vaccinated = as.numeric(pregnancies_vaccinated))

pregnancies_vaccinated_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "pregnancies_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(pregnancies_vaccinated_q14=sum(pregnancies_vaccinated))
pregnancies_vaccinated_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "pregnancies_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(pregnancies_vaccinated_q15=sum(pregnancies_vaccinated))
pregnancies_vaccinated_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "pregnancies_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(pregnancies_vaccinated_q16=sum(pregnancies_vaccinated))
pregnancies_vaccinated_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "pregnancies_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(pregnancies_vaccinated_q1=sum(pregnancies_vaccinated))
pregnancies_vaccinated_q1$year <- pregnancies_vaccinated_q1$year-1

e <- left_join(e, pregnancies_vaccinated_q14)
e <- left_join(e, pregnancies_vaccinated_q15)
e <- left_join(e, pregnancies_vaccinated_q16)
e <- left_join(e, pregnancies_vaccinated_q1)

# Infants with their vaccines up to date

pp <- infants_vaccinated %>%
  gather(colnames(infants_vaccinated)[2:145], key=Period, value=infants_vaccinated) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), infants_vaccinated = as.numeric(infants_vaccinated))

infants_vaccinated_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "infants_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infants_vaccinated_q14=sum(infants_vaccinated))
infants_vaccinated_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "infants_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infants_vaccinated_q15=sum(infants_vaccinated))
infants_vaccinated_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "infants_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infants_vaccinated_q16=sum(infants_vaccinated))
infants_vaccinated_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "infants_vaccinated")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(infants_vaccinated_q1=sum(infants_vaccinated))
infants_vaccinated_q1$year <- infants_vaccinated_q1$year-1

e <- left_join(e, infants_vaccinated_q14)
e <- left_join(e, infants_vaccinated_q15)
e <- left_join(e, infants_vaccinated_q16)
e <- left_join(e, infants_vaccinated_q1)

# Births

pp <- births %>%
  gather(colnames(births)[2:145], key=Period, value=births) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), births = as.numeric(births))

births_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "births")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_q14=sum(births))
births_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "births")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_q15=sum(births))
births_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "births")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_q16=sum(births))
births_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "births")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_q1=sum(births))
births_q1$year <- births_q1$year-1

e <- left_join(e, births_q14)
e <- left_join(e, births_q15)
e <- left_join(e, births_q16)
e <- left_join(e, births_q1)

# Births with low weight

pp <- births_lowweight %>%
  gather(colnames(births_lowweight)[2:145], key=Period, value=births_lowweight) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), births_lowweight = as.numeric(births_lowweight))

births_lowweight_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "births_lowweight")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_lowweight_q14=sum(births_lowweight))
births_lowweight_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "births_lowweight")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_lowweight_q15=sum(births_lowweight))
births_lowweight_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "births_lowweight")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_lowweight_q16=sum(births_lowweight))
births_lowweight_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "births_lowweight")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(births_lowweight_q1=sum(births_lowweight))
births_lowweight_q1$year <- births_lowweight_q1$year-1

e <- left_join(e, births_lowweight_q14)
e <- left_join(e, births_lowweight_q15)
e <- left_join(e, births_lowweight_q16)
e <- left_join(e, births_lowweight_q1)

# Babies below 1 year

pp <- babies_below1yr %>%
  gather(colnames(babies_below1yr)[2:145], key=Period, value=babies_below1yr) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), babies_below1yr = as.numeric(babies_below1yr))

babies_below1yr_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "babies_below1yr")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(babies_below1yr_q14=sum(babies_below1yr))
babies_below1yr_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "babies_below1yr")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(babies_below1yr_q15=sum(babies_below1yr))
babies_below1yr_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "babies_below1yr")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(babies_below1yr_q16=sum(babies_below1yr))
babies_below1yr_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "babies_below1yr")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(babies_below1yr_q1=sum(babies_below1yr))
babies_below1yr_q1$year <- babies_below1yr_q1$year-1

e <- left_join(e, babies_below1yr_q14)
e <- left_join(e, babies_below1yr_q15)
e <- left_join(e, babies_below1yr_q16)
e <- left_join(e, babies_below1yr_q1)

# Healthcare personnel

pp <- personnel_all %>%
  gather(colnames(personnel_all)[2:168], key=Period, value=personnel_all) %>%
  separate(Period, c("month", "year")) %>%
  mutate(cod_ibge = as.numeric(cod_ibge), year = as.numeric(year), personnel_all = as.numeric(personnel_all))

personnel_all_q13 <- pp[which(pp$year %in% election_years & pp$month %in% q13_months),c("cod_ibge", "year", "personnel_all")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(personnel_all_q13=sum(personnel_all))
personnel_all_q14 <- pp[which(pp$year %in% election_years & pp$month %in% q14_months),c("cod_ibge", "year", "personnel_all")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(personnel_all_q14=sum(personnel_all))
personnel_all_q15 <- pp[which(pp$year %in% election_years & pp$month %in% q15_months),c("cod_ibge", "year", "personnel_all")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(personnel_all_q15=sum(personnel_all))
personnel_all_q16 <- pp[which(pp$year %in% election_years & pp$month %in% q16_months),c("cod_ibge", "year", "personnel_all")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(personnel_all_q16=sum(personnel_all))
personnel_all_q1 <- pp[which(pp$year %in% postelection_years & pp$month %in% q1_months),c("cod_ibge", "year", "personnel_all")] %>%
  group_by(cod_ibge, year) %>%
  dplyr::summarize(personnel_all_q1=sum(personnel_all))
personnel_all_q1$year <- personnel_all_q1$year-1

e <- left_join(e, personnel_all_q13)
e <- left_join(e, personnel_all_q14)
e <- left_join(e, personnel_all_q15)
e <- left_join(e, personnel_all_q16)
e <- left_join(e, personnel_all_q1)


# Generate variables with changes in the stock of healthcare personnel --------

e <- e %>%
  mutate(flow_healthcare_personnel_q14 = personnel_all_q14 - personnel_all_q13,
         flow_healthcare_personnel_q15 = personnel_all_q15 - personnel_all_q14,
         flow_healthcare_personnel_q16 = personnel_all_q16 - personnel_all_q15,
         flow_healthcare_personnel_q1 = personnel_all_q1 - personnel_all_q16)

# Remove superfluous variables --------------------------------------------

e <- e %>%
  dplyr::select(-c(personnel_all_q13, personnel_all_q14, personnel_all_q15, personnel_all_q16, personnel_all_q1))

# Export data -------------------------------------------------------------

write_csv(e, "../../datasets/analysis/healthcare_data.csv")

# Notes: R version, platform, and loaded packages -------------------------

sessionInfo(package = NULL)

# R version 4.2.1 (2022-06-23)
# Platform: aarch64-apple-darwin20 (64-bit)
# Running under: macOS Monterey 12.1
# 
# Matrix products: default
# LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] here_1.0.1      forcats_0.5.2   stringr_1.5.0   dplyr_1.1.2     purrr_0.3.4    
# [6] readr_2.1.2     tidyr_1.2.0     tibble_3.2.1    ggplot2_3.3.6   tidyverse_1.3.2
# 
# loaded via a namespace (and not attached):
#   [1] pillar_1.9.0        compiler_4.2.1      cellranger_1.1.0    dbplyr_2.2.1       
# [5] tools_4.2.1         lubridate_1.8.0     jsonlite_1.8.0      googledrive_2.0.0  
# [9] lifecycle_1.0.3     gargle_1.2.0        gtable_0.3.1        pkgconfig_2.0.3    
# [13] rlang_1.1.1         reprex_2.0.2        DBI_1.1.3           cli_3.6.1          
# [17] rstudioapi_0.14     haven_2.5.1         xml2_1.3.3          withr_2.5.0        
# [21] httr_1.4.4          generics_0.1.3      vctrs_0.6.2         fs_1.5.2           
# [25] hms_1.1.2           rprojroot_2.0.3     googlesheets4_1.0.1 grid_4.2.1         
# [29] tidyselect_1.2.0    glue_1.6.2          R6_2.5.1            fansi_1.0.4        
# [33] readxl_1.4.1        tzdb_0.3.0          modelr_0.1.9        magrittr_2.0.3     
# [37] backports_1.4.1     scales_1.2.1        ellipsis_0.3.2      rvest_1.0.3        
# [41] assertthat_0.2.1    colorspace_2.0-3    utf8_1.2.3          stringi_1.7.12     
# [45] munsell_0.5.0       broom_1.0.1         crayon_1.5.1   